
library(pacman)
p_load(
  tidyverse, lubridate, httr, tidyjson, readxl, openxlsx, purrr
)
# tidyr - Help create tidy data
# stringr - Make working with strings easier (string_glue())
# readr - Read csv, txt, etc
# tibble - Wrapper for data.frames that makes things easier (e.g. never converts strings to factors)
# Make working with dates easier
# Perform HTTP requests (in this case used to get data from FACT API)
# Work with json objects in a tidy way. Useful for highly nested objects and "ragged" arrays and/or objects (varying lengths by document)
# Read data from xlsx files via read_excel()
# Create and write to formatted xlsx documents
# Use of partial and map functions

api_key <- Sys.getenv("FACT_KEY")
eia_860_year <- 2018 # The  EIA data year (available at https://www.eia.gov/electricity/data/eia860/)
earliest_retirement_year <- 2010 # All units retired before this year are filtered out of the data
eia_data_file <- str_glue("https://www.eia.gov/electricity/data/eia860/archive/xls/eia860{eia_860_year}.zip")
egrid_data_file <- "https://www.epa.gov/sites/production/files/2020-09/epa-eia_plant_id_crosswalk.xlsx"

response <-
  GET(str_glue(
    "https://api.epa.gov/FACT/1.0/facilities?api_key={api_key}"
  ))
# If something is wrong with the request, fail gracefully
stop_for_status(response, content(response)$error$message)
camd_plants_json <- content(response, as = "text") %>%
  enter_object("data") # Top level json object that is an array of all the plants/oris
camd_plants <- camd_plants_json %>%
  gather_array() %>%
  spread_all()
camd_combustion_units <- camd_plants %>%
  enter_object("units") %>%
  gather_array() %>%
  spread_all()
# Filter CAMD data to filter out units that started operating after EIA data year
# Filter out units that retired before earliest_retirement_year value
camd_combustion_units <- camd_combustion_units %>%
  filter((status == "OPR" &
            ymd(as.Date(statusDate)) <= str_glue("{eia_860_year}-12-31")) |
           (status %in% c("RET", "LTCS") &
              ymd(as.Date(statusDate)) >= str_glue("{earliest_retirement_year}-01-01")))
# Get the unit and generator IDs
camd_generators <- camd_combustion_units %>%
  enter_object("generators") %>%
  gather_array() %>%
  spread_all() %>%
  select(
    orisCode,
    unitId,
    generatorId,
    nameplateCapacity
  ) %>%
  as_tibble()
# Get the primary fuel description for each unit
camd_fuels <- camd_combustion_units %>%
  enter_object("fuels") %>%
  gather_array() %>%
  spread_all() %>%
  subset(indicatorDescription == "Primary") %>%
  select(
    orisCode,
    unitId,
    fuelDesc
  ) %>%
  as_tibble()
# Joining unit and generator ID with fuel into a complete units table
camd_unit <- camd_combustion_units %>%
  as_tibble() %>%
  left_join(camd_generators,
            by = c("orisCode", "unitId")
  ) %>%
  left_join(camd_fuels,
            by = c("orisCode", "unitId")
  ) %>%
  select(
    orispl_code = "orisCode",
    camd_facility_name = "name",
    camd_state = state.abbrev,
    lat = geographicLocation.latitude,
    lon = geographicLocation.longitude,
    unitid = "unitId",
    fuel_type = "fuelDesc",
    camd_generator_id = "generatorId",
    camd_namepcap = "nameplateCapacity",
    camd_status = "status",
    camd_status_date = "statusDate"
  ) %>%
  mutate(camd_retire_year = ifelse(camd_status != "OPR", year(ymd(
    as.Date(camd_status_date)
  )), 0)) %>%
  arrange(orispl_code, unitid)
# Clean up
rm(camd_plants_json)
rm(camd_plants)
rm(camd_fuels)
rm(camd_generators)
rm(camd_combustion_units)
rm(response)
camd_unit